In [296]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import chart_studio.plotly as py
from IPython.display import IFrame
from datetime import datetime
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
%matplotlib inline
In [2]:
confirmed_cases_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
deaths_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
cured_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"
In [3]:
confirmed_cases = pd.read_csv(confirmed_cases_path)
confirmed_cases.head()
Out[3]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20
0 NaN Thailand 15.0000 101.0000 2 3 5 7 8 8 ... 50 53 59 70 75 82 114 147 177 212
1 NaN Japan 36.0000 138.0000 2 1 2 2 4 4 ... 511 581 639 639 701 773 839 825 878 889
2 NaN Singapore 1.2833 103.8333 0 1 3 3 4 5 ... 150 160 178 178 200 212 226 243 266 313
3 NaN Nepal 28.1667 84.2500 0 0 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
4 NaN Malaysia 2.5000 112.5000 0 0 0 3 4 4 ... 117 129 149 149 197 238 428 566 673 790

5 rows × 61 columns

In [4]:
deaths_data = pd.read_csv(deaths_path)
deaths_data.head()
Out[4]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20
0 NaN Thailand 15.0000 101.0000 0 0 0 0 0 0 ... 1 1 1 1 1 1 1 1 1 1
1 NaN Japan 36.0000 138.0000 0 0 0 0 0 0 ... 10 10 15 16 19 22 22 27 29 29
2 NaN Singapore 1.2833 103.8333 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 NaN Nepal 28.1667 84.2500 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 NaN Malaysia 2.5000 112.5000 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 2 2

5 rows × 61 columns

In [5]:
recovered_cases = pd.read_csv(cured_path)
recovered_cases.head()
Out[5]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20
0 NaN Thailand 15.0000 101.0000 0 0 0 0 2 2 ... 31 33 34 34 35 35 35 35 41 42
1 NaN Japan 36.0000 138.0000 0 0 0 0 1 1 ... 76 101 118 118 118 118 118 144 144 144
2 NaN Singapore 1.2833 103.8333 0 0 0 0 0 0 ... 78 78 96 96 97 105 105 109 114 114
3 NaN Nepal 28.1667 84.2500 0 0 0 0 0 0 ... 1 1 1 1 1 1 1 1 1 1
4 NaN Malaysia 2.5000 112.5000 0 0 0 0 0 0 ... 24 24 26 26 26 35 42 42 49 60

5 rows × 61 columns

Let us see the cumulative growth of number of cases per each day over the entire world

In [6]:
days_columns = confirmed_cases.columns[4:]
In [7]:
world_cases_growth = confirmed_cases[days_columns].sum(axis = 0).reset_index()
world_cases_growth.columns = ['Date', 'Count']
In [8]:
def isweekend(date):
    return datetime.isoweekday(pd.to_datetime(date)) > 5
world_cases_growth['isweekend'] = world_cases_growth['Date'].apply(isweekend).astype(int)
In [9]:
plt.rcParams['figure.figsize'] = [20, 10]
plt.stem(world_cases_growth['Date'], world_cases_growth['Count'], '--ro')
plt.plot(world_cases_growth['Date'], world_cases_growth['Count'], '--bo')
plt.title("Spread of virus per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.yticks(world_cases_growth['Count'])
plt.show()
Sudden spike can be seen from 12-Feb to 13-Feb almost 15000 cases were identified in just a single day . But most of them could be suspected cases.

Delta difference from day to day confirmed cases

In [10]:
world_cases_growth['delta_confirmed'] = world_cases_growth['Count'].sub(world_cases_growth['Count'].shift())
world_cases_growth.loc[0, 'delta_confirmed'] = world_cases_growth.loc[0, 'Count']
In [11]:
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['delta_confirmed'], '--yo', color = world_cases_growth['isweekend'])
plt.plot(world_cases_growth['Date'], world_cases_growth['delta_confirmed'], '--ro')
for i in range(world_cases_growth.shape[0]):
    plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'delta_confirmed']+2, 
             world_cases_growth.loc[i,'delta_confirmed'].astype(int))
plt.title("Delta new cases of to Covid-19 identified on each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
C:\Users\Amey\AppData\Roaming\Python\Python36\site-packages\matplotlib\cbook\deprecation.py:107: MatplotlibDeprecationWarning:

stem() got an unexpected keyword argument 'color'. This will raise a TypeError in future versions.

Parallelly let us compare the active cases to # of deaths on each day

In [12]:
world_cases_growth['deaths'] = deaths_data[days_columns].sum(axis = 0).reset_index(drop = True)
In [13]:
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['deaths'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['deaths'], '--ro')
for i in range(world_cases_growth.shape[0]):
    plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'deaths']+50, 
             world_cases_growth.loc[i,'deaths'])
plt.title("Growth of death toll due to Covid-19 per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()

But the above representation is a cumulative sum of previous day count and newly added count, We can look at the delta change for every day.

In [14]:
world_cases_growth['delta_deaths'] = world_cases_growth['deaths'].sub(world_cases_growth['deaths'].shift())
world_cases_growth.loc[0, 'delta_deaths'] = world_cases_growth.loc[0, 'deaths']
In [15]:
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['delta_deaths'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['delta_deaths'], '--ro')
for i in range(world_cases_growth.shape[0]):
    plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'delta_deaths']+2, 
             world_cases_growth.loc[i,'delta_deaths'].astype(int))
plt.title("# of deaths due to Covid-19 per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()

Let us also see how many cases were cured on daily basis

In [16]:
world_cases_growth['recovered'] = recovered_cases[days_columns].sum(axis = 0).reset_index(drop = True)
In [17]:
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['recovered'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['recovered'], '--go')
for i in range(world_cases_growth.shape[0]):
    plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'recovered']+50, 
             world_cases_growth.loc[i,'recovered'])
plt.title("# of people that recovered from Covid-19 virus")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
In [18]:
plt.rcParams['figure.figsize'] = [20, 10]
plt.figure(1)
plt.subplot(311)
plt.plot(world_cases_growth['Date'], world_cases_growth['Count'], '--bo')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()

plt.subplot(312)
plt.plot(world_cases_growth['Date'], world_cases_growth['deaths'], '--ro')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()

plt.subplot(313)
plt.plot(world_cases_growth['Date'], world_cases_growth['recovered'], '--go')

plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()
plt.show()

Stacked Bar plot of confirmed, recovered and death counts on daily bases

In [108]:
p1 = plt.bar(world_cases_growth['Date'], world_cases_growth['Count'], color = 'yellow')
p2 = plt.bar(world_cases_growth['Date'], world_cases_growth['recovered'], color='g')
p3 = plt.bar(world_cases_growth['Date'], world_cases_growth['deaths'], color='r')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend([p1[0], p2[0], p3[0]], ('Confirmed', 'Recovered', 'Deaths'))
plt.xlabel("Date")
plt.title("Stacked diagram of COVID-19 cases")
Out[108]:
Text(0.5,1,'Stacked diagram of COVID-19 cases')
In [20]:
world_cases_growth['deaths'].values[-1]/world_cases_growth['Count'].values[-1]
Out[20]:
0.040635614908566375

Increase in the trend of people recovering is a very good sign and considering the growth of confirmed cases and # of deaths, mortality rate is still as low as 3.6%

Let us now see the growth of virus in each country

In [21]:
con_cases_growth = confirmed_cases.groupby('Country/Region')[days_columns].agg(np.sum).reset_index()
con_cases_growth.head()
Out[21]:
Country/Region 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 1/29/20 1/30/20 ... 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20
0 Afghanistan 0 0 0 0 0 0 0 0 0 ... 4 5 7 7 7 11 16 21 22 22
1 Albania 0 0 0 0 0 0 0 0 0 ... 2 10 12 23 33 38 42 51 55 59
2 Algeria 0 0 0 0 0 0 0 0 0 ... 20 20 20 24 26 37 48 54 60 74
3 Andorra 0 0 0 0 0 0 0 0 0 ... 1 1 1 1 1 1 1 2 39 39
4 Antigua and Barbuda 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 1 1 1 1 1

5 rows × 58 columns

In [22]:
plt.show()

As per the latest data lets see what is the severity of cases in each country

In [23]:
import requests
html_response = requests.get("https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports?_pjax=%23js-repo-pjax-container")
In [24]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_response.content, "html.parser")
In [25]:
a_tags = list(map(lambda x : x.get('href'), soup.findAll(name = 'a', attrs={'class' : 'js-navigation-open '})))
In [26]:
repo_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/"+os.path.basename(a_tags[-2])
repo_path
Out[26]:
'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-18-2020.csv'
In [27]:
# daily_data = "csse_covid_19_data\\csse_covid_19_daily_reports"
# daily_data_dir = os.path.join(os.path.dirname(os.getcwd()), daily_data)
# daily_data_files = glob.glob(daily_data_dir+'/*.csv')[-1]
daily_data_files = repo_path
In [28]:
latest_data = pd.read_csv(daily_data_files)
latest_data.head()
Out[28]:
Province/State Country/Region Last Update Confirmed Deaths Recovered Latitude Longitude
0 Hubei China 2020-03-18T12:13:09 67800 3122 56927 30.9756 112.2707
1 NaN Italy 2020-03-18T17:33:05 35713 2978 4025 41.8719 12.5674
2 NaN Iran 2020-03-18T12:33:02 17361 1135 5389 32.4279 53.6880
3 NaN Spain 2020-03-18T13:13:13 13910 623 1081 40.4637 -3.7492
4 NaN Germany 2020-03-18T19:33:02 12327 28 105 51.1657 10.4515
In [123]:
latest_data['Province/State'] = latest_data['Province/State'].fillna(latest_data['Country/Region'])
latest_data['active'] = latest_data['Confirmed'] - (latest_data[['Deaths', 'Recovered']].sum(axis = 1))
In [125]:
agg_latest_data = latest_data.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'active'].agg([np.sum]).reset_index()
agg_latest_data.columns = agg_latest_data.columns.droplevel(1) 
agg_latest_data.head()
Out[125]:
Country/Region Confirmed Deaths Recovered active
0 Afghanistan 22 0 1 21
1 Albania 59 2 0 57
2 Algeria 74 7 12 55
3 Andorra 39 0 1 38
4 Antigua and Barbuda 1 0 0 1
In [126]:
agg_latest_data = agg_latest_data.loc[(agg_latest_data[['Confirmed', 'Deaths', 'Recovered']] > 0).all(axis=1), :].reset_index(drop = True)
In [32]:
manual_dict = {'mainland china' : 'CHN', 'north macedonia' : 'MKD', 'palestine' : 'PSE', 
               'saint barthelemy' : 'FRA', 'south korea' : 'KOR', 'st. martin' : 'PYC', 
               'uk' : 'GBR', 'us' : 'USA', 'vatican city': 'ITA'}
def get_country_code(series):
    df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
    df2 = pd.read_csv('countryCodes.csv', sep = '\t')
    country_code_dict = {row[1][0].lower() : row[1][2] for row in df.iterrows()}
    added_dict = {row[1][0].lower() : row[1][2] for row in df2.iterrows()}
#     print(country_code_dict)
    res = []
    for key in series:
        if key.lower() in country_code_dict:
            res.append(country_code_dict.get(key.lower(), None))
        elif key.lower() in added_dict:
            res.append(added_dict.get(key.lower(), None))
        else:
            res.append(manual_dict.get(key.lower(), None))
    return res
In [127]:
agg_latest_data['Code'] = get_country_code(agg_latest_data['Country/Region'])
In [128]:
from scipy.stats import rankdata
In [298]:
fig = px.choropleth(agg_latest_data, locations='Code', color=rankdata(agg_latest_data['active'])/agg_latest_data.shape[0], 
                     hover_data = ['Country/Region', 'active'], 
                     projection="natural earth", color_continuous_scale = 'YlOrRd',
                   title="Count of Active COVID-19 cases in each country")
fig.update_geos(resolution=110,
    showcountries=True, showcoastlines=False,)
# fig.show()
# fig.write_html("images/worldplot.html")
iplot(fig, filename='images/worldplot')

On every day new people get affected, a part of affected people recover and some people die. Let us see what is the count of active cases in each day.

Province/State wise distribution of Active COVID-19 cases in Each country

In [301]:
fig = px.treemap(latest_data, path = ['Country/Region', 'Province/State'], values = np.log(latest_data['active']+1),
                hover_data=['active'], title = "Province/State wise distribution of Active COVID-19 cases in Each country")
fig.update_layout(width=1000, 
                  height=2500)
iplot(fig, filename='images/treemap_active.html')
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: RuntimeWarning:

invalid value encountered in log

active = confirmed - (recovered + dead)

In [131]:
world_cases_growth['active_cases'] = world_cases_growth['Count'] - (world_cases_growth[['recovered', 'deaths']].sum(axis = 1))
In [299]:
fig = px.scatter(world_cases_growth, x = 'Date', y = 'active_cases', title='Daily count of active cases of COVID 19 through out the world')
# fig.show()
iplot(fig, filename='active_cases')

The number of Active cases have almost doubled with in the span of one week, 55K on 12-Mar and the number went past 120k on 18-Mar

Let us have a look at the list of countries in which Active cases grew. For now we will concentrate on a selected list of countries which have been appearing a lot in News.
In [277]:
selected_countries = ['China', 'Italy', 'France', 'Spain','Germany', 'Iran', 'Korea, South', 'US', 'United Kingdom', 'Switzerland']
# selected_dates = [1]+list(range(4, confirmed_cases.shape[1]))
selected_dates = [1]+list(range(confirmed_cases.shape[1]-14, confirmed_cases.shape[1]))
In [278]:
confirmed_cases_view = confirmed_cases.iloc[:, selected_dates]
confirmed_cases_view = confirmed_cases_view.loc[confirmed_cases_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
confirmed_cases_view.columns = confirmed_cases_view.columns.droplevel(1)
confirmed_cases_view
Out[278]:
Country/Region 3/5/20 3/6/20 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20
0 China 80537 80690 80770 80823 80860 80887 80921 80932 80945 80977 81003 81033 81058 81102
1 France 380 656 957 1134 1217 1792 2290 2290 3678 4487 4523 6668 7699 9105
2 Germany 482 670 799 1040 1176 1457 1908 2078 3675 4585 5795 7272 9257 12327
3 Iran 3513 4747 5823 6566 7161 8042 9000 10075 11364 12729 13938 14991 16169 17361
4 Italy 3858 4636 5883 7375 9172 10149 12462 12462 17660 21157 24747 27980 31506 35713
5 Korea, South 6088 6593 7041 7314 7478 7513 7755 7869 7979 8086 8162 8236 8320 8413
6 Spain 259 400 500 673 1073 1695 2277 2277 5232 6391 7798 9942 11748 13910
7 Switzerland 114 214 268 337 374 491 652 652 1139 1359 2200 2200 2700 3028
8 US 217 262 402 518 583 959 1281 1663 2179 2727 3499 4632 6421 7783
9 United Kingdom 116 164 207 274 322 384 459 459 802 1144 1145 1551 1960 2642
In [279]:
recovered_cases_view = recovered_cases.iloc[:, selected_dates]
recovered_cases_view = recovered_cases_view.loc[recovered_cases_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
recovered_cases_view.columns = recovered_cases_view.columns.droplevel(1)
recovered_cases_view
Out[279]:
Country/Region 3/5/20 3/6/20 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20
0 China 52292 53944 55539 57388 58804 60181 61644 62901 64196 65660 67017 67910 68798 69755
1 France 12 12 12 12 12 12 12 12 12 12 12 12 12 12
2 Germany 16 17 18 18 18 18 25 25 46 46 46 67 67 105
3 Iran 739 913 1669 2134 2394 2731 2959 2959 2959 2959 4590 4590 5389 5389
4 Italy 414 523 589 622 724 724 1045 1045 1439 1966 2335 2749 2941 4025
5 Korea, South 41 135 135 118 118 247 288 333 510 510 510 1137 1407 1540
6 Spain 2 2 30 30 32 32 183 183 193 517 517 530 1028 1081
7 Switzerland 3 3 3 3 3 3 4 4 4 4 4 4 4 15
8 US 7 7 7 7 7 8 8 12 12 12 12 17 17 0
9 United Kingdom 8 8 18 18 18 19 19 19 19 19 19 21 53 67
In [280]:
deaths_data_view = deaths_data.iloc[:, selected_dates]
deaths_data_view = deaths_data_view.loc[deaths_data_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
deaths_data_view.columns = deaths_data_view.columns.droplevel(1)
deaths_data_view
Out[280]:
Country/Region 3/5/20 3/6/20 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20
0 China 3015 3044 3072 3100 3123 3139 3161 3172 3180 3193 3203 3217 3230 3241
1 France 6 9 11 19 19 33 48 48 79 91 91 148 148 148
2 Germany 0 0 0 0 2 2 3 3 7 9 11 17 24 28
3 Iran 107 124 145 194 237 291 354 429 514 611 724 853 988 1135
4 Italy 148 197 233 366 463 631 827 827 1266 1441 1809 2158 2503 2978
5 Korea, South 35 42 44 50 53 54 60 66 66 72 75 75 81 84
6 Spain 3 5 10 17 28 35 54 55 133 195 289 342 533 623
7 Switzerland 1 1 1 2 2 3 4 4 11 13 14 14 27 28
8 US 12 14 17 21 22 28 36 40 47 54 63 85 108 118
9 United Kingdom 1 2 2 3 4 6 8 8 8 21 21 56 56 72
In [281]:
active_data_view = confirmed_cases_view.iloc[:, 1:] - (recovered_cases_view.iloc[:, 1:]+deaths_data_view.iloc[:, 1:])
active_data_view
Out[281]:
3/5/20 3/6/20 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20
0 25230 23702 22159 20335 18933 17567 16116 14859 13569 12124 10783 9906 9030 8106
1 362 635 934 1103 1186 1747 2230 2230 3587 4384 4420 6508 7539 8945
2 466 653 781 1022 1156 1437 1880 2050 3622 4530 5738 7188 9166 12194
3 2667 3710 4009 4238 4530 5020 5687 6687 7891 9159 8624 9548 9792 10837
4 3296 3916 5061 6387 7985 8794 10590 10590 14955 17750 20603 23073 26062 28710
5 6012 6416 6862 7146 7307 7212 7407 7470 7403 7504 7577 7024 6832 6789
6 254 393 460 626 1013 1628 2040 2039 4906 5679 6992 9070 10187 12206
7 110 210 264 332 369 485 644 644 1124 1342 2182 2182 2669 2985
8 198 241 378 490 554 923 1237 1611 2120 2661 3424 4530 6296 7665
9 107 154 187 253 300 359 432 432 775 1104 1105 1474 1851 2503
In [282]:
import plotly.graph_objects as go
In [300]:
fig = go.Figure()
for i in range(confirmed_cases_view.shape[0]):
    fig.add_trace(go.Scatter(x = confirmed_cases_view.columns[1:], y = active_data_view.iloc[i, 1:], 
                             name = confirmed_cases_view.iloc[i, 0]))
fig.update_layout(title = "Spread of COVID-19 in last 14 days in major Countries", height=800, width = 1000)
iplot(fig, filename='country_spread')

The graph shows how much a nation is prepared for an eidemic, clearly Italy was not expecting such massive outbreak. China on the otherhand showed the world that with proper care this Virus can be contained.